Overview:

This code explores the

Read in Data

usda_nutrients <- read_csv(here("data", "usda_nutrients.csv"))
## Rows: 8618 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): FoodGroup, ShortDescrip, Descrip, CommonName, MfgName, ScientificName
## dbl (39): ID, Energy_kcal, Protein_g, Fat_g, Carb_g, Sugar_g, Fiber_g, VitA_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean names, make a subset, and remove NA values

nutrients_clean <- usda_nutrients %>% 
  clean_names()

nutrients_sub <- nutrients_clean %>% 
  select(id:descrip, energy_kcal:zinc_usrda) %>% 
  drop_na()

PCA Analysis

nutrients_pca <- nutrients_sub %>% 
  select(energy_kcal:vit_e_mg) %>% 
  scale() %>% 
  prcomp()

nutrients_pca$rotation
##                      PC1         PC2        PC3         PC4         PC5
## energy_kcal -0.513047359 -0.10845307  0.3681406  0.09764979 -0.10073217
## protein_g    0.106216801 -0.41818044  0.1429488 -0.27943323 -0.51992426
## fat_g       -0.305743045 -0.19725192  0.5795031  0.08109105  0.19115091
## carb_g      -0.480815803  0.25569405 -0.2593080  0.12601996 -0.21257414
## sugar_g     -0.379833550  0.24137319 -0.2009575  0.30865990 -0.02757043
## fiber_g     -0.318772017  0.07211375 -0.2370528 -0.29450401 -0.31246090
## vit_a_mcg   -0.051098719 -0.43638664 -0.2680348  0.48415461  0.15778289
## vit_b6_mg   -0.231330745 -0.32817492 -0.3156069 -0.42426856 -0.09053081
## vit_b12_mcg  0.006908463 -0.54358508 -0.2398219  0.34445281 -0.09904928
## vit_c_mg    -0.098370888 -0.07922089 -0.3103273 -0.31639506  0.61651851
## vit_e_mg    -0.298494686 -0.21382494  0.1383029 -0.27341944  0.34119835
##                     PC6         PC7         PC8          PC9        PC10
## energy_kcal  0.19821583 -0.16104341  0.08054962  0.002340961 -0.18934413
## protein_g    0.38642173  0.00474741 -0.48293288 -0.167672698 -0.06619486
## fat_g        0.04900534 -0.20134181  0.19287993  0.084332632  0.28854691
## carb_g       0.06325457 -0.02960117  0.05377364 -0.033529032 -0.61464013
## sugar_g      0.32260030  0.40571854 -0.31964678  0.018085293  0.53839257
## fiber_g     -0.47071219 -0.50384337 -0.18218277  0.126106317  0.35633632
## vit_a_mcg   -0.15403397 -0.24102781 -0.07805117 -0.621622084  0.03852800
## vit_b6_mg    0.12043269  0.29882257  0.61820138 -0.180079887  0.18043348
## vit_b12_mcg -0.06992287  0.07594336  0.01787803  0.708752153 -0.07200381
## vit_c_mg     0.45274725 -0.35699596 -0.21358154  0.156222767 -0.07109110
## vit_e_mg    -0.48087979  0.48327964 -0.38810702 -0.042852864 -0.19391741
##                      PC11
## energy_kcal -0.6798830352
## protein_g    0.1730324832
## fat_g        0.5640572148
## carb_g       0.4341101626
## sugar_g     -0.0020713321
## fiber_g     -0.0335934993
## vit_a_mcg   -0.0007360972
## vit_b6_mg   -0.0048013035
## vit_b12_mcg  0.0020251097
## vit_c_mg    -0.0063637235
## vit_e_mg    -0.0023743023
nutrients_pca$sdev
##  [1] 1.58016523 1.37968818 1.30323194 1.09246231 0.99829375 0.92498713
##  [7] 0.86997663 0.66285198 0.59544319 0.54839030 0.06279142

Create a biplot

x <- autoplot(nutrients_pca,
         data = nutrients_sub,
         loadings = TRUE,
         colour = "food_group",
         loadings.label = TRUE,
         loadings.colour = "black",
         loadings.label.colour = "black",
         hide_legend = TRUE)

ggplotly(x)